cap n log close
cd "/export/projects2/wkerr_AHA_project/replication"
log using aha-prep-1.log, replace

* Kerr, Kerr, and Smith, AHA
* Raw data preparation file
* stata-mp8-10g -b do aha-prep-1.do
* Last Modified: March 2025

clear
set max_memory 50g
set more off


**create imrs**
***************
use year multyear serial bpl sex ancestr1 perwt sploc educ if sploc!=0 & year>=1990 using ./data-input/AllYears-prep1dataset.dta, clear
ren sploc pernum
foreach X in bpl sex ancestr1 educ{
 ren `X' sp_`X'
 }
ren perwt sp_wtdobs
compress
save ./temp/sploc.dta, replace


**create male file to link dad characteristics to AHA children**
***************
use year multyear serial pernum bpl ancestr1 ancestr2 perwt educ age vetvietn sex using ./data-input/AllYears-prep1dataset.dta, clear

keep if year>=1990 & sex==1
rename pernum poploc

g vvet=perwt*(vetvietn==2)
drop vetvietn

foreach X in bpl ancestr1 ancestr2 educ vvet age {
 ren `X' f_`X'
 }
ren perwt f_wtdobs
compress

save ./temp/poploc.dta, replace 
	

**create female file to link mom characteristics to AHA children**
***************
use year multyear serial pernum bpl ancestr1 ancestr2 perwt educ age sex using ./data-input/AllYears-prep1dataset.dta, clear

keep if year>=1990 & sex==2
rename pernum momloc

foreach X in bpl ancestr1 ancestr2 educ age {
 ren `X' m_`X'
 }
ren perwt m_wtdobs
compress

save ./temp/momloc.dta, replace

********************
use ./data-input/AllYears-prep1dataset.dta, clear

**Map pre-2011 metarea data to 2011 and after met2013 data**

replace metaread=4481 if metaread==4482

**pull in clusite and other potential CEM variables and the urban variable**

merge m:1 metarea using ./data-input/pre2011mappings.dta
replace metarea=metamerge if metamerge!=.
drop _m metarea_lbl metamerge metamerge_lbl

merge m:1 met2013 using ./data-input/post2011mappings.dta
replace met2013=met2013merge if met2013merge!=.
drop _m met2013_lbl met2013merge met2013merge_lbl

merge m:1 met2013 using ./data-input/met2013tometarea.dta
replace metarea=mapmeta if met2013!=.
drop _m mapmeta mapmeta_lbl met2013 met2013_lbl

merge m:1 metarea using ./data-input/clusites.dta, keepusing(clusite setserv fmhpi distca region) update replace
drop _m

tab clusite

merge m:1 metarea using ./data-input/urbruralmapping, keepusing(urban) update replace
drop _m

tab urban

drop if year==.

sleep 1000

compress

label define metarea_lbl 168 "Cleveland-Lorain-Elyria, OH", modify
label define metarea_lbl 476 "Manchester-Nashua, NH", modify
label define metarea_lbl 760 "Seattle-Tacoma-Everett, WA", modify
label define metarea_lbl 500 "Miami-West Palm, FL", modify

merge m:1 occ using ./data-input/2018occcodes.dta
drop _m
rename description occdesc

**********************************************
display "**Build the core worker samples for analysis**"
**********************************************
g byr=(year-age)
replace byr=(multyear-age) if multyear!=.
label var byr "Birth year"
label var multyear "The actual year of survey in multi-year ACS samples"

g vAHA=(byr>=1962 & byr<=1975 & yrimmig>=1989 & yrimmig!=. & bpld==51800)*perwt
label var vAHA "Amerasians eligible for AHA immigration (wtd)"

sort year serial famunit
sleep 1000
by year serial famunit: egen hhAHA = total(vAHA)
g vfAHA=(yrimmig>=1989 & yrimmig!=. & hhAHA>0 & bpld==51800 & byr!=.)*perwt
replace vfAHA=0 if vAHA>0
drop hhAHA
label var vfAHA "Potential family members of AHA"

g imm=(yrimmig!=. & yrimmig>0 & yrimmig!=996 & byr!=.)*perwt
label var imm "Immigrants to the US (wtd)"

g vi=(yrimmig!=. & yrimmig>0 & yrimmig!=996 & bpld==51800 & byr!=.)*perwt
g obsvi=vi/perwt
label var vi "Vietnamese immigrant (person-weighted)"
label var obsvi "Vietnamese immigrant (obs)"

g vnAHA = vi
replace vnAHA=0 if (vAHA>0 | vfAHA>0)
g vtAHA = vAHA + vfAHA
label var vnAHA "Non-AHA-related Vietnamese immigrants (person-weighted)"
label var vtAHA "Combined potential AHA-related immigrants (AHA and potential family members)"

gen vncAHA=vi-vAHA
label var vncAHA "Vietnamese immigrants excluding AHA immigrants (person-weighted)"

g vd=(languaged==5000 | ancestr1d==7850 | ancestr2d==7850)*perwt
replace vd=0 if vi>0
g obsvnAHA=vnAHA/perwt
label var vd "Of Vietnamese descent or ancestry"
label var obsvnAHA "Non-AHA-related Vietnamese immigrants (obs)"

g vvet=perwt*(vetvietn==2)
replace vvet=0 if sex==2
g vet=perwt*(vetstat==2)
g military=perwt*(vetstatd>=12 & vetstatd<=23)
label var vvet "Vietnam War veteran"
label var vet "Veteran"
label var military "Military service member"

drop if perwt==0 | perwt==.

sort year yrimmig

*calculate age at immigration
g ageimm=(yrimmig-byr)
replace ageimm=. if yrimmig==. | yrimmig==0 | byr==. | byr==0
label var ageimm "Age at immigration"


*Create variable for years of education
g yrseduc =.
replace yrseduc=9 if educd>2 & educd<=30
replace yrseduc=10 if educd==40
replace yrseduc=11 if educd==50
replace yrseduc=11.5 if inlist(educd, 60, 61, 62, 64)
replace yrseduc=12 if educd==63
replace yrseduc=12.5 if educd==65
replace yrseduc=13 if inlist(educd, 70, 71)
replace yrseduc=14 if inlist(educd, 80, 81, 82, 83)
replace yrseduc=15 if educd==90
replace yrseduc=16 if inlist(educd, 100, 101)
replace yrseduc=17 if inlist(educd, 114, 115, 116)

g yrsed2 = "Less than HS" if educd>2 & educd<60
replace yrsed2="High school" if inlist(educd, 60, 61, 62, 63, 64)
replace yrsed2="Partial college" if educd>64 & educd<100
replace yrsed2="College and above" if educd>=100 & educd<999

*mark age group of immigrants at time of immigration
g a0x13=0
replace a0x13=perwt if ageimm>0 & ageimm <14
g a14x18=0
replace a14x18=perwt if ageimm>=14 & ageimm <=18
g aover18=0
replace aover18=perwt if ageimm>18 & ageimm!=.

g ylforce=0
replace ylforce=perwt if labforce==2

***income and wages***
replace ftotinc=. if ftotinc==9999999

replace incwage=. if incwage==999999 | incwage==999998

replace inctot=. if inctot==9999999

replace inctot=-9900 if inctot==-009995

gen inccat=.
replace inccat=1 if inctot<20000
replace inccat=2 if inctot>=20000 & inctot<40000
replace inccat=3 if inctot>=40000 & inctot<60000
replace inccat=4 if inctot>=60000 & inctot!=.

label define inccat_lbl 1 "<$20,000" 2 "$20,000-39,999" 3 "$40,000-59,999" 4 ">=$60,000"
label values inccat inccat_lbl

gen wkswk=.
replace wkswk=7 if wkswork2==1
replace wkswk=20 if wkswork2==2
replace wkswk=33 if wkswork2==3
replace wkswk=43.5 if wkswork2==4
replace wkswk=48.5 if wkswork2==5
replace wkswk=51 if wkswork2==6

replace valueh=. if valueh==9999999

compress

g loweng=0
replace loweng=perwt if inlist(speakeng, 6, 1)
g mideng=0
replace mideng=perwt if inlist(speakeng, 5, 4)
g onlyeng=0
replace onlyeng=perwt if speakeng==3

g agegrp=0 if ageimm>=14 & ageimm<=17 & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
replace agegrp=1 if ageimm>=18 & ageimm<=21 & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
label var agegrp "Age group"

label define agegrp_lbl 0 "Ages 14 to 17" 1 "Ages 18 to 21"
label values agegrp agegrp_lbl

g agegrp3=agegrp
replace agegrp3=2 if ageimm>=22 & ageimm!=. & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
label var agegrp3 "Age group (3 buckets)"
label define agegrp3_lbl 0 "Ages 14 to 17" 1 "Ages 18 to 21" 2 "Ages 22 and up"
label values agegrp3 agegrp3_lbl

g compgrp=agegrp

replace compgrp=2 if bpl<100 & byr>=1968 & byr<=1975

replace compgrp=3 if compgrp==2 & race==1

replace compgrp=4 if compgrp==2 & race!=1

replace compgrp=5 if inlist(bpld, 60020, 60021, 60073, 60022, 60023, 60024, 26020, 51200, 60045, 60092, 60046, 60030, 52140, 60051, 52150, 60034, 60055, 60056, 52100, 50000, 43000, 52120, 60012, 30040, 71032, 21060, 60031, 51500, 71013, 60015) & byr>=1968 & byr<=1975 & yrimmig>=1989 & yrimmig<=1995 & ageimm>=14 & ageimm<=21

label var compgrp "Analysis comparison group"
label define compgrp_lbl 0 "Young AHA (14-17 y.o.)" 1 "Old AHA (18-21 y.o.)" 2 "Comparison group of US natives" 3 "Comparison group of white US natives" 4 "Comparison group of minority US natives" 5 "Comparison group of non-Vietnamese immigrants", replace
label values compgrp compgrp_lbl

g AHA=0
replace AHA=1 if vAHA>=0 & vAHA!=.

replace poverty=. if poverty==0

g incwfct=incwelfr
replace incwfct=0 if incwfct==99999
replace incwfct=1 if incwfct>0 & incwfct!=.

replace incwelfr=. if incwelfr==99999 

**merge in mom, pop, and spouse data**
drop if perwt==.

*poploc merge*

merge m:1 year multyear serial poploc using ./temp/poploc, keepus(f_bpl f_ancestr1 f_ancestr2 f_vvet f_age)
drop if _m==2
label var f_age "Father's age"

g f_natvvet=0
replace f_natvvet=1 if _m==3 & f_bpl>=0 & f_bpl<=99 & f_vvet>0 & f_vvet!=. 
label var f_natvvet "Father AND US-born AND Vietnam War vet"

g f_vborn=0
replace f_vborn=1 if _m==3 & f_bpl==518
label var f_vborn "Father AND Vietnam-born"

g f_natvan=0
replace f_natvan=1 if _m==3 & f_bpl>=0 & f_bpl<=99 & ((f_ancestr1>=785 & f_ancestr1<=790) | (f_ancestr2>=785 & f_ancestr2<=790))
label var f_natvan "father AND US-born AND Vietnamese ancestry"

drop _m f_bpl f_ancestr1 f_ancestr2 f_vvet 

*momloc merge*

merge m:1 year multyear serial momloc using ./temp/momloc, keepus(m_bpl m_ancestr1 m_ancestr2 m_age)
drop if _m==2
label var m_age "Mother's age"

g m_vborn=0
replace m_vborn=1 if _m==3 & m_bpl==518
label var m_vborn "Mother AND Vietnam-born"

g m_natvan=0
replace m_natvan=1 if _m==3 & m_bpl>=0 & m_bpl<=99 & ((m_ancestr1>=785 & m_ancestr1<=790) | (m_ancestr2>=785 & m_ancestr2<=790))
label var m_natvan "Mother AND US-born AND Vietnamese ancestry"

g m_van=0
replace m_van=1 if _m==3 & ((m_ancestr1>=785 & m_ancestr1<=790) | (m_ancestr2>=785 & m_ancestr2<=790))
label var m_van "Mother AND Vietnamese ancestry"


drop _m  m_bpl m_ancestr1 m_ancestr2 

*sploc merge*

merge 1:1 year multyear serial pernum using ./temp/sploc, keepus(sp_wtdobs sp_bpl sp_sex sp_ancestr1 sp_educ)
drop if _m==2
g imr=((bpl==sp_bpl) | (ancestr1==sp_ancestr1)) if _m==3
g imrx=(bpl==sp_bpl) if _m==3
label var imr "In-marriage rate"
label var imrx "In-marriage rate strict"

g vimr=0
replace vimr=1 if imr==1 & vi>0
g vimrx=0
replace vimrx=1 if imrx==1 & vi>0
label var vimr "VNM immigrant in-marriage indicator (using bpl and ancestry)"
label var vimrx "Strict VNM immigrant in-marriage indicator (using bpl only)"

replace marst=0 if inlist(marst, 3, 4, 5, 6)
replace marst=1 if marst==1 | marst==2
label define marst_lbl 0 "not married" 1 "married", replace

g marsploc=0
replace marsploc=1 if _m==3
label var marsploc "Marital status calc. using sploc"

drop _m

g marnonbpl=0
replace marnonbpl=1 if marst==1 & bpl!=sp_bpl
label var marnonbpl "Married to non-Vietnamese"

g marnonanc=0
replace marnonanc=1 if marst==1 & bpl!=sp_bpl & ancestr1!=sp_ancestr1
label var marnonanc "Married to non-Vietnamese bpl and not same ancestry"

g marnatsp=0
replace marnatsp=1 if sp_bpl<=120
label var marnatsp "Married to US native"

g marcolsp=0
replace marcolsp=1 if sp_educ>=10 & sp_educ!=.
label var marcolsp "Married to college-educated spouse"

replace gq=. if gq==0
replace gq=0 if inlist(gq, 1, 2)
replace gq=1 if inlist(gq, 3, 4, 5, 6) 
label define gq_lbl 0 "Not in group quarters" 1 "In group quarters", replace

g avgincpp=ftotinc/famsize

g momhh=0
replace momhh=1 if momloc>0 & momloc!=.
label var momhh "Has mom in household"
g dadhh=0
replace dadhh=1 if poploc>0 & poploc!=.
label var dadhh "Has dad in household"
g sphh=0
replace sphh=1 if sploc>0 & sploc!=.
label var sphh "Has spouse in household"
g chhh=0
replace chhh=1 if nchild>0 & nchild!=.
label var chhh "Has child in household"
g chl5hh=0
replace chl5hh=1 if nchlt5>0 & nchlt5!=.
label var chl5hh "Has child <5 y.o. in household"
g sibhh=0
replace sibhh=1 if nsibs>0 & nsibs!=.
label var sibhh "Has sibling in household"

g headhh=0
replace headhh=1 if relate==01
label var headhh "Is head of household"
g sphhh=0
replace sphhh=1 if relate==02
label var sphhh "Is spouse of head of household"
g child=0
replace child=1 if relate==03 | relate==04
label var child "Is child of head of household"
g parent=0
replace parent=1 if relate==05 | relate==06
label var parent "Is parent of head of household"
g sib=0
replace sib=1 if relate==07 | relate==08
label var sib "Is sibling of head of household"
g othrel=0
replace othrel=1 if relate==09 | relate==10
label var othrel "Is other relative of head of household"
g nonrel=0
replace nonrel=1 if relate==11 | relate==12 | relate==13
label var nonrel "Is not a relative of head of household"

g wparent=0
replace wparent=1 if momhh==1 | dadhh==1
label var wparent "Living with parent"

replace vetstat=0 if vetstat==0 | vetstat==1
replace vetstat=1 if vetstat==2

drop momloc poploc sploc nchild

g lHS=0
replace lHS=1 if educ<06
g HS=0
replace HS=1 if educ==06
g scol=0
replace scol=1 if educ>06 & educ<10
g col=0
replace col=1 if educ>=10 & educ!=.
g ewell=0
replace ewell=1 if inlist(speakeng, 5, 4, 3, 2)
replace ewell=. if inlist(speakeng, 0, 7, 8, 9)
g emp=0
replace emp=1 if empstat==1
g unemp=0
replace unemp=1 if empstat==2
g ninlf=0
replace ninlf=1 if labforce!=2
g HSup=0
replace HSup=1 if educ>=06 & educ!=.
replace HSup=. if educ==.

g colup=0
replace colup=1 if educ>=10 & educ!=.
replace colup=. if educ==.

g young=0 if ageimm>=18 & ageimm<=21 & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
replace young=1 if ageimm>=14 & ageimm<=17 & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
label var young "Group of AHA arrivals (ages 14-17 or ages 18-21) in core group (immig '89-'95)"
label define young_lbl 0 "ages 18 to 21" 1 "ages 14 to 17", replace
label values young young_lbl

g youngdet=0 if ageimm>=18 & ageimm<=21 & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
replace youngdet=1 if ageimm>=14 & ageimm<=15 & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
replace youngdet=2 if ageimm>=16 & ageimm<=17 & vAHA>0 & vAHA!=. & yrimmig>=1989 & yrimmig<=1995
label var youngdet "More specific groupings of AHA arrivals (ages 14-15, ages 16-17, or ages 18-21) in core group (immig '89-'95)"
label define youngdet_lbl 0 "ages 18 to 21" 1 "ages 14 to 15" 2 "ages 16 to 17"
label values youngdet youngdet_lbl

g calif=0
replace calif=1 if statefip==06
label var calif "Lives in California"

gen gender=0 if sex==1 //0 if male
replace gender=1 if sex==2 //1 if female
label var gender "Gender"
label define gender_lbl 0 "male" 1 "female"
label values gender gender_lbl

replace ownershp=. if ownershp==0
replace ownershp=0 if ownershp==2
replace ownershp=1 if ownershp==1

replace rentgrs=. if rentgrs==0
replace rentgrs=0 if (year==1980 | year==1990) & rentgrs==0001

replace marrno=. if inlist(marrno, 0, 7, 8, 9)
replace marrno=0 if marrno==1
replace marrno=1 if marrno>=1 & marrno <=6

replace hcovany=0 if hcovany==1
replace hcovany=1 if hcovany==2
label var hcovany "Health insurance coverage status (1 indicates coverage)"
label define hcovany_lbl 0 "No health insurance coverage" 1 "With health insurance coverage", replace
label values hcovany hcovany_lbl

g regyr=year
replace year=multyear if multyear!=.
label var regyr "Grouping of years (identified by the last year in grouping)"

g linrentg=rentgrs
g linincwg=incwage
g lininct=inctot
g linincwf=incwelfr
g linfinct=ftotinc
g linvalueh=valueh

for var incwage ftotinc inctot incwelfr avgincpp valueh rentgrs: replace X=ln(X)

gen ct=!missing(perwt)
label var ct "Person count (wtd)"

*label variables
label var lHS "Max education level is <HS degree"
label var HS "Max education level is a HS degree"
label var scol "Max education level is some college education"
label var col "Max education level is college (4+ years)"
label var HSup "Education level is HS degree or higher"
label var colup "Education level is college (4+ years) educ or higher"
label var yrseduc "Years of education"
label var ewell "Speaks english well"
label var emp "Employed (and in labor force)"
label var unemp "Unemployed (and in labor force)"
label var ninlf "Not in labor force"
label var avgincpp "Avg. income per person in family"
label var linincwg "Linear wage income"
label var lininct "Linear total personal income"
label var linincwf "Linear welfare income"
label var incwfct "welfare income (0-1)"
label var linrentg "Linear gross rent"
label var linfinct "Linear family total income"
label var linvalueh "Linear home value"
label var clusite "Cluster site status"
label define clusite_lbl 0 "Not cluster site" 1 "Cluster site", replace
label values clusite clusite_lbl

gen adjfac=.
replace adjfac=2.295*1.553 if regyr==1980
replace adjfac=1.344*1.553 if regyr==1990 
replace adjfac=1*1.553 if regyr==2000 
replace adjfac=.774*1.553 if regyr==2009 
replace adjfac=.715*1.553 if regyr==2014 
replace adjfac=.644*1.553 if regyr==2019

for var ftotinc incwage inctot linincwg lininct linfinct linincwf linrentg linvalueh avgincpp valueh incwelfr rentgrs: replace X=X*adjfac 

compress

sort year metarea
sleep 1000

keep year multyear regyr serial strata cluster hhwt pernum perwt ct compgrp agegrp agegrp3 young youngdet vAHA ageimm yrimmig age gender metarea puma calif statefip clusite setserv distca region ftotinc incwage inctot incwfct linincwg lininct linfinct linincwf linrentg linvalueh avgincpp marst valueh educ educd yrseduc HSup colup lHS HS scol col vi vd relate ewell empstat emp unemp labforce ninlf bpl bpld ancestr1 vvet famsize nfams nsibs nsubfam gq momhh dadhh sphh chhh chl5hh sibhh headhh sphhh child parent sib othrel nonrel nchlt5 wparent incwelfr poverty vetstat tranwork ownershp rentgrs hcovany race raced bpld urban birthqtr imr imrx vimr vimrx marst marrno marsploc marnonbpl marnonanc marnatsp marcolsp imm vfAHA vtAHA vnAHA vncAHA vet military obsvnAHA obsvi f_natvvet f_vborn f_natvan m_vborn m_natvan m_van m_age f_age emp unemp ninlf lininct inctot linincwg incwage marst marcolsp marnonanc marnatsp ownershp linvalueh valueh linrentg regyr young age gender statefip cluster metarea ewell yrseduc HSup colup perwt HSup colup incwfct ewell yrseduc emp unemp ninlf linfinct linvalueh linincwf incwfct famsize momhh dadhh sibhh f_natvvet f_vborn f_natvan m_vborn m_natvan calif m_van momhh dadhh sibhh occ byr occdesc category

save ./temp/AllVietImmigResults_short.dta, replace

**create files with data by metarea**

keep regyr year metarea puma vi vd perwt

drop if metarea==0

collapse (sum) vi vd pumapop=perwt, by(regyr metarea puma)

g viorvd=vi+vd
drop vi vd

gen pcap_vpu=viorvd/pumapop
label var pcap_vpu "Share of the PUMA that is Vietnamese"
save ./temp/pcap_vpu.dta, replace

bysort regyr metarea: egen msavpop=total(viorvd)

gen pc_vpuma=viorvd/msavpop
label var pc_vpuma "Share of the VNM in an MSA that are in this PUMA"

gen minuspc_vpuma = -pc_vpuma
bys regyr metarea (minuspc_vpuma): gen rank=_n
drop minuspc_vpuma
bys regyr metarea: egen npuma=max(rank)
drop if npuma<5 | rank !=1
drop viorvd msavpop npuma rank

keep if regyr>=1980 & regyr<=2019
sleep 1000
save ./temp/maxvpumamapping90to19.dta, replace

**create MSA-level variables. prep data for regressions, descriptive tables**

use regyr year metarea puma perwt clusite linfinct linincwg lininct linincwf incwfct linvalueh linrentg educ age statefip imr imrx vimr vimrx marst imm vi vd unemp labforce vAHA vfAHA vtAHA vnAHA vncAHA vvet vet military obsvnAHA obsvi ct if year>=1990 & year<=2019 using ./temp/AllVietImmigResults_short.dta, clear

merge m:1 regyr metarea puma using ./temp/maxvpumamapping90to19.dta
g maxpuma=0
replace maxpuma=1 if _m==3 & vi>0
drop _m
label var maxpuma "Is puma in its msa that has the highest % Vietnamese immigrants or descendants"

merge m:1 regyr metarea using ./temp/maxvpumamapping90to19.dta
g denompuma=0
replace denompuma=1 if _m==3 & vi>0
label var denompuma "Viet. immig. who are in an MSA that has a max PUMA"
drop _m

gen colov21=0
replace colov21=1 if educ>=10 & educ!=. & age>=21 & age!=.
label var colov21 "Over 21 and college education or higher"
gen ov21=0
replace ov21=1 if age>=21 & age!=.
g vmar=0
replace vmar=1 if marst==1 & vi>0
label var vmar "Married Vietnamese immigrants"

drop if metarea==0

collapse (mean) msaftotinc=linfinct msaincwage=linincwg msainctot=lininct msaincwf=linincwf msavalueh=linvalueh msaincrent=linrentg (sum) colov21 ov21 vimr vimrx imr imrx msamaxpuma=maxpuma msadenompuma=denompuma msaunemp=unemp msalf=labforce msaincwfct=incwfct mar=marst vmar (rawsum) imm vi vd vAHA vnAHA vvet vet ct msapop=perwt [fw=perwt], by(regyr metarea) fast

g mviorvd=vi+vd

label var msavalueh "msa home value"
label var msaftotinc "msa family total income"
label var msaincwage "msa wage income"
label var msainctot "msa total personal income"
label var msaincwf "msa welfare income"
label var msaincwfct "msa welfare income (0-1)" 
label var msaincrent "msa gross monthly rental cost of housing unit"
label var msamaxpuma "by msa, percent of those in the max percent Viet. immig. or descend. puma in their msa"
label var msadenompuma "by msa, Viet. immig. who are in an MSA with a max PUMA"
label var msaunemp "msa unemployment"
label var msalf "msa labor force participation"
label var mviorvd "share of Vietnamese in msa" 
label var msapop "msa population (wtd)"

replace colov21=colov21/ov21
label var colov21 "Percent of those over age 21 who are college educ. or higher"

g mvimr=vimr
label var mvimr "Strict MSA VNM immigrant in-marriage rate (using bpl and ancestry), out of all married VNM"

g mvimrx=vimrx
label var mvimrx "Strict MSA VNM immigrant in-marriage rate (using bpl only), out of all married VNM"

for any mvimr mvimrx: replace X=X/vmar

g mimr=imr
label var mimr "Strict MSA in-marriage rate (using bpl and ancestry), out of all married"

g mimrx=imrx
label var mimrx "Strict MSA in-marriage rate (using bpl only), out of all married"

for any imr imrx: replace X=X/mar

drop vmar mar

replace msamaxpuma=msamaxpuma/msadenompuma

for any mviorvd msaunemp msalf msaincwfct: replace X=X/msapop
drop msadenompuma ov21

save ./temp/AllVietImmigResultswmet2013map_bymsa_forreg.dta, replace

**create files with data by hh**

use year serial perwt relate age educ ewell empstat labforce bpl vvet if year>=1990 & year<=2019 using ./temp/AllVietImmigResults_short.dta, clear

gen hhhed=.
replace hhhed=educ if relate==01
gen hhheng=.
replace hhheng=ewell if relate==01
gen hhhestat=.
replace hhhestat=empstat if relate==01 & labforce==2
gen hhhbpl=.
replace hhhbpl=bpl if relate==01
gen shhhbpl=.
replace shhhbpl=bpl if relate==02
bysort year serial: egen hhad30up=max(age)
replace hhad30up=0 if hhad30up<=30
replace hhad30up=1 if hhad30up>30 & hhad30up!=.
label var hhad30up "Lives with adult over age 30"

drop relate ewell empstat bpl

collapse (max) maxedhh=educ hhhed hhheng hhhestat hhvvet=vvet hhhbpl shhhbpl hhad30up, by(year serial) fast
label var hhvvet "Vietnam War veteran present in hh"

save ./temp/AllVietImmigResultswmet2013map_byhh_forreg.dta, replace

**Merge data into master file**

use ./temp/AllVietImmigResults_short.dta, clear

merge m:1 regyr metarea using ./temp/AllVietImmigResultswmet2013map_bymsa_forreg.dta, keepusing(msaftotinc msaincwage msainctot msaincwf msavalueh msaincrent colov21 msapop msamaxpuma msaincwfct mvimr mvimrx mimr mimrx mviorvd)
drop if _m==2
drop _m

merge m:1 year serial using ./temp/AllVietImmigResultswmet2013map_byhh_forreg.dta, keepusing(maxedhh hhhed hhheng hhhestat hhvvet hhhbpl shhhbpl hhad30up)
drop if _m==2
drop _m

merge m:1 regyr metarea using ./temp/maxvpumamapping90to19.dta
drop if _m==2
g maxpuma=.
replace maxpuma=0 if _m==3
g denompuma=.
replace denompuma=1 if _m==3
drop _m
merge m:1 regyr metarea puma using ./temp/maxvpumamapping90to19.dta
drop if _m==2
replace maxpuma=1 if _m==3
drop _m
label var denompuma "Viet. immig. who are in an MSA with a max PUMA"

merge m:1 regyr metarea puma using ./temp/pcap_vpu.dta, keepusing(regyr metarea puma pcap_vpu)

g hhlHS=0
replace hhlHS=1 if maxedhh<06
label var hhlHS "Max education of household is <HS degree"
g hhHS=0
replace hhHS=1 if maxedhh>=06 & maxedhh<10
label var hhHS "Max education of household is a HS degree"

g hhhlHS=0
replace hhhlHS=1 if hhhed<06
label var hhhlHS "Max education of head of household is <HS degree"

g hhhHS=0
replace hhhHS=1 if hhhed>=06 & hhhed<10
label var hhhHS "Max education of head of household is a HS degree"

g hhcol=0
replace hhcol=1 if maxedhh>=10 & maxedhh!=.
label var hhcol "Max education of household is college or above"

gen hhnat=0
replace hhnat=1 if hhhbpl<=120 | shhhbpl<=120
label var hhnat "Head of household or spouse of head of hh is US born native"

g hhhcol=0
replace hhhcol=1 if hhhed>=10 & hhhed!=.
label var hhhcol "Head of household is col educated"

g hhhewell=hhheng
label var hhhewell "Head of household speaks English well"

g hhhemp=0
replace hhhemp=1 if hhhestat==1
label var hhhemp "Head of household is employed"

g hhhunemp=0
replace hhhunemp=1 if hhhestat==2
label var hhhunemp "Head of household is unemployed"

replace hhvvet=1 if hhvvet>0 & hhvvet!=.
replace hhvvet=0 if hhvvet!=1

drop hhhbpl hhhed hhheng hhhestat maxedhh
g gqhh=0
replace gqhh=1 if gq==1
label var gqhh "Household is in group quarters"

g linmvalh=msavalueh
g linmfinct=msaftotinc
g linmincwg=msaincwage
g linminct=msainctot
g linmincwf=msaincwf
g linmincr=msaincrent
label var linmvalh "Linear msa home value"
label var linmfinct "Linear msa family total income"
label var linmincwg "Linear msa wage income"
label var linminct "Linear msa total personal income"
label var linmincwf "Linear msa welfare income"
label var linmincr "Linear msa gross monthly rental cost of housing unit"

for var msaftotinc msaincwage msainctot msaincwf msavalueh msaincrent: replace X=ln(X)

label var maxpuma "In max Vietnamese segregated puma"

preserve
use year compgrp serial age linincwg regyr cluster if regyr==1990 & inlist(compgrp, 0, 1) using ./temp/AllVietImmigResults_short.dta, clear
sort serial
collapse (sum) totahawg=linincwg, by(serial regyr)
save ./temp/1990ahawagebyhh.dta, replace
restore

drop _m
merge m:1 serial regyr using ./temp/1990ahawagebyhh.dta
drop _m
label var totahawg "Focal AHA immigrants' linear wage income"

g linfinct_noaha=linfinct-totahawg
replace linfinct_noaha=0 if linfinct_noaha<0
label var linfinct_noaha "Linear family total income excluding AHA"

egen superstrat = group(regyr strata)

svyset cluster [pweight=perwt], strata(superstrat)

sort regyr year

compress

**erase temp files not needed
local temp_files sploc poploc momloc maxvpumamapping90to19 AllVietImmigResultswmet2013map_bymsa_forreg AllVietImmigResultswmet2013map_byhh_forreg 1990ahawagebyhh pcap_vpu AllVietImmigResults_short

foreach f of local temp_files {
    erase ./temp/`f'.dta
}

save ./temp/CoreData.dta, replace

*** End of program
log close
